#!/usr/bin/env python
# -*- coding:utf-8 -*-
from selenium import webdriver
import time
import lxml
from bs4 import BeautifulSoup
from pyvirtualdisplay import Display

display = Display(visible=0, size=(800, 600))
display.start()

def driver_open():
    browser = webdriver.Firefox()
    return browser


def get_content(browser, url):
    browser.get(url)
    print(browser.title)

    time.sleep(5)
    content = browser.page_source.encode('utf-8')
    browser.close()
    browser.quit()
    display.stop()
    soup = BeautifulSoup(content, 'lxml')
    return soup


def get_basic_info(soup):
    basic_info = soup.select('.baseInfo_model2017')

    zt = soup.select('.td-regStatus-value > p ')[0].text.replace("\n", "").replace(" ", "")
    basics = soup.select('.basic-td > .c8 > .ng-binding ')
    zzjgdm = basics[3].text
    tyshxydm = basics[7].text
    # print(u'公司名称：' + company)
    print(u'公司状态：' + zt)
    # print basics
    print(u'组织机构代码：' + zzjgdm)
    print(u'统一社会信用代码：' + tyshxydm)


if __name__ == '__main__':
    url = "http://www.tianyancha.com/company/2310290454"
    browser = driver_open()
    soup = get_content(browser, url)
    print(soup.body.text)
    print('----获取基础信息----')
    get_basic_info(soup)
